{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "from tqdm import tqdm\n",
    "import time\n",
    "import csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "url = 'https://www.emaotai.cn/Wapshop/ProductReview.aspx?ProductId=9&page=2&size=20'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "headers = {\n",
    "    'User-Agent': 'Mozilla/5.0'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "jd_comment_urls = ['https://club.jd.com/comment/productPageComments.action?productId=251837&score=0&sortType=6&page=%d&pageSize=10' % (i) for i in range(100)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "r = requests.get(url, headers=headers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\r\\n<!doctype html>\\r\\n<html lang=\"zh-CN\">\\r\\n<head>\\r\\n    <!--<script src=\\'//kefu.easemob.com/webim/easemob.js?tenantId=20718&hide=false&sat=false\\' async=\\'async\\'></script>-->\\r\\n    <title>登录 - 茅台商城</title>\\r\\n\\r\\n    <meta content=\"width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=0;\"\\r\\n          name=\"viewport\" />\\r\\n    <meta name=\"format-detection\" content=\"telephone=no\" />\\r\\n    <meta name=\"apple-mobile-web-app-capable\" content=\"yes\">\\r\\n    <meta name=\"apple-mobile-web-app-status-bar-style\" content=\"black\">\\r\\n    <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\\r\\n    <link rel=\"stylesheet\" href=\"/Utility/bootflat/css/bootstrap.min.css\" rev=\"stylesheet\"\\r\\n          type=\"text/css\">\\r\\n    <link rel=\"stylesheet\" href=\"/Utility/bootflat/bootflat/css/bootflat.min.css\" rev=\"stylesheet\"\\r\\n          type=\"text/css\">\\r\\n    <link rel=\"stylesheet\" href=\"/Utility/icomoon/style.css\" rev=\"stylesheet\" type=\"text/css\">\\r\\n    <link rel=\"stylesheet\" href=\"/templates/vshop/default/style/css.css\" rev=\"stylesheet\" type=\"text/css\">\\r\\n    <link rel=\"stylesheet\" href=\"/templates/vshop/default/style/main.css\" rev=\"stylesheet\" type=\"text/css\">\\r\\n    <script src=\"/templates/vshop/default/script/jquery-1.11.0.min.js\"></script>\\r\\n    <script src=\"/Utility/bootflat/js/bootstrap.min.js\"></script>\\r\\n    <script src=\"/templates/vshop/default/script/jquery.slides.min.js\"></script>\\r\\n    <script src=\"/Utility/common.js\" type=\"text/javascript\"></script>\\r\\n    <script src=\"/templates/vshop/default/script/main.js\"></script>\\r\\n    \\r\\n    <script>\\r\\n        $(function () {\\r\\n           //location.href = \"/weihu/sjshengji.html\";\\r\\n\\r\\n        });\\r\\n    </script>\\r\\n    <style>\\r\\n       iframe\\r\\n\\t{\\r\\n\\t   margin-bottom:45px;\\r\\n\\t  \\r\\n\\t}\\r\\n    </style>\\r\\n</head>\\r\\n<body >\\r\\n\\r\\n<!-- 此段必须要引入 t为小时级别的时间戳 -->\\r\\n<link type=\"text/css\" href=\"//g.alicdn.com/sd/ncpc/nc.css?t=1492479762603\" rel=\"stylesheet\" />\\r\\n<script type=\"text/javascript\" src=\"//g.alicdn.com/sd/ncpc/nc.js?t=1492479762603\"></script>\\r\\n<div id=\"_umfp\" style=\"display:inline;width:1px;height:1px;overflow:hidden\"></div>\\r\\n\\r\\n<!-- 引入结束 -->\\r\\n<script src=\"../../../Utility/vshopSelector/vshopCodeLogin.js\"></script>\\r\\n<script src=\"../../../Utility/vshopSelector/vshopSelector.js\" type=\"text/javascript\"></script>\\r\\n\\r\\n<div id=\"divLogin\" class=\"pbox\">\\r\\n    <p>手机用户，您好！首次登录需绑定茅台商城账号。</p>\\r\\n    <input type=\"text\" id=\"txtUserName\" class=\"form-control\" placeholder=\"请输入您的用户名\">\\r\\n    <div class=\"input-group\">\\r\\n        <input id=\"txtPassword\" type=\"password\" class=\"form-control\" placeholder=\"请输入您的密码\">\\r\\n        <span class=\"input-group-btn\">\\r\\n            <button class=\"btn btn-default\" type=\"button\" ontouchstart=\"javascript:$(\\'#txtPassword\\').attr(\\'type\\',\\'text\\');\" ontouchend=\"javascript:$(\\'#txtPassword\\').attr(\\'type\\',\\'password\\');\"><span class=\"glyphicon glyphicon-eye-open\"></span></button>\\r\\n        </span>\\r\\n    </div>\\r\\n    <div id=\"codeVib\">\\r\\n        <!--<div style=\"float:left;  \"><input type=\"text\" style=\"width:100px; height:32px;\" name=\"txtCode\" id=\"txtCode\" placeholder=\"请输入验证码\" /></div>\\r\\n        <div style=\"float:left; margin-left:10px; border:1px solid #ccc;\">\\r\\n            <a href=\"javascript:refreshCode();\">\\r\\n                <img id=\"img_txtCode\" src=\"\" alt=\"\" />     <img id=\"imgVerifyCode\" src=\"../VerifyCodeImage.aspx\" alt=\"\" style=\"border-style: none\" />\\r\\n            </a>\\r\\n            <input style=\"visibility:hidden;display: none;\" type=\"text\" id=\"txtCodeMark\" name=\"txtCodeMark\" value=\"0\" />\\r\\n        </div>-->\\r\\n\\r\\n        <div class=\"ln\">\\r\\n            <div id=\"dom_id\"></div>\\r\\n        </div>\\r\\n\\r\\n        <input type=\\'hidden\\' id=\\'csessionid\\' name=\\'csessionid\\' />\\r\\n        <input type=\\'hidden\\' id=\\'sig\\' name=\\'sig\\' />\\r\\n        <input type=\\'hidden\\' id=\\'token\\' name=\\'token\\' />\\r\\n        <input type=\\'hidden\\' id=\\'scene\\' name=\\'scene\\' />\\r\\n    </div>\\r\\n    <button type=\"button\" id=\"btnBindUser\" class=\"btn btn-success btn-block\">确认绑定这个账号</button>\\r\\n    <p class=\"text-right\"><a id=\"aRegister\" href=\"javascript:void(0)\">我还没有账号，立刻去注册！</a></p>\\r\\n</div>\\r\\n<div id=\"divRegister\" class=\"pbox\" style=\"display:none;\">\\r\\n    <p>您好！欢迎注册茅台商城会员帐号。</p>\\r\\n    <input type=\"url\" id=\"txtRegisterUserName\" class=\"form-control\" placeholder=\"请输入您的用户名\" onkeyup=\"$(\\'#text_mail\\').val($(this).val());\">\\r\\n    <div class=\"input-group mail\">\\r\\n        <input id=\"text_mail\" type=\"url\" class=\"form-control\" placeholder=\"请输入您的邮箱\">\\r\\n        <div id=\"emailList\" class=\"input-group-btn\"></div>\\r\\n        <input type=\"hidden\" id=\"text_email_domain\" />\\r\\n    </div>\\r\\n    <div class=\"input-group\">\\r\\n        <abbr class=\"formselect\">\\r\\n            <select id=\"typeis\">\\r\\n                <option value=\"0\">选择会员类型</option>\\r\\n                <option value=\"1\">个人</option>\\r\\n                <option value=\"1\">团体</option>\\r\\n            </select>\\r\\n        </abbr>\\r\\n    </div>\\r\\n    <div style=\" clear::both\"></div>\\r\\n    <div class=\"input-group\">\\r\\n        <input id=\"text_password\" type=\"password\" class=\"form-control\" placeholder=\"请输入您的密码\">\\r\\n        <span class=\"input-group-btn\">\\r\\n            <button class=\"btn btn-default\" type=\"button\" ontouchstart=\"javascript:$(\\'#text_password\\').attr(\\'type\\',\\'text\\');\" ontouchend=\"javascript:$(\\'#text_password\\').attr(\\'type\\',\\'password\\');\"><span class=\"glyphicon glyphicon-eye-open\"></span></button>\\r\\n        </span>\\r\\n    </div>\\r\\n    <input type=\"text\" id=\"register_txtCellPhone\" class=\"form-control\" placeholder=\"请输入您的手机号码\" />\\r\\n    <div>\\r\\n        <input type=\"text\" style=\"width:40%;float:left;\" id=\"txtRegisterMsgCode\" class=\"form-control\" placeholder=\"请输6位验证码\" /><span style=\"width:40%;float:left; margin-left:5%;\" id=\"a_sendCode\" class=\"codezq\">获取短信</span>\\r\\n    </div>\\r\\n\\r\\n    <button type=\"button\" id=\"btnRegisterUser\" class=\"btn btn-danger btn-block\">注册并登录账号</button>\\r\\n\\r\\n    <p class=\"text-right\"><a id=\"aLogin\" href=\"javascript:void(0)\">我已有账号，立刻去登录！</a></p>\\r\\n</div>\\r\\n<style>\\r\\n    #codeVib {\\r\\n        /*display: none;*/\\r\\n    }\\r\\n\\r\\n\\r\\n    #a_sendCode {\\r\\n        color: white;\\r\\n        padding: 6px 20px;\\r\\n        cursor: pointer;\\r\\n        border: none;\\r\\n        text-align: center;\\r\\n        border-radius: 4px;\\r\\n    }\\r\\n\\r\\n    .codezq {\\r\\n        background-color: #DA4453;\\r\\n    }\\r\\n\\r\\n    .col_gray {\\r\\n        background-color: #cccccc;\\r\\n    }\\r\\n\\r\\n    .colClick {\\r\\n        pointer-events: none;\\r\\n    }\\r\\n\\r\\n    .codeloginzq {\\r\\n        background-color: #8CC152;\\r\\n    }\\r\\n\\r\\n    #btnBindUser {\\r\\n        margin-top: 50px;\\r\\n    }\\r\\n\\r\\n    /*滑动验证*/\\r\\n    #t_i_t_i_l_e_S_t_y_l_e{\\r\\n            border-radius: 10px;\\r\\n    }\\r\\n    .ln,  .ln1 {\\r\\n        padding: 5px 0;\\r\\n    }\\r\\n\\r\\n    .ln .h,.ln1 .h {\\r\\n        display: inline-block;\\r\\n        width: 4em;\\r\\n    }\\r\\n\\r\\n    .ln input,.ln1 input {\\r\\n        border: solid 1px #999;\\r\\n        padding: 5px 8px;\\r\\n    }\\r\\n\\r\\n    .nc-container .clickCaptcha .clickCaptcha_img img {\\r\\n        width: 93%;\\r\\n        height: 230px;\\r\\n        margin-left: 10px;\\r\\n        margin-top: 5px;\\r\\n    }\\r\\n</style>\\r\\n<script>\\r\\n    //禁用右上角菜单\\r\\n    document.addEventListener(\\'WeixinJSBridgeReady\\', function onBridgeReady() {\\r\\n        WeixinJSBridge.call(\\'hideOptionMenu\\');\\r\\n    });\\r\\n</script>\\r\\n<script type=\"text/javascript\">\\r\\n    ///验证码\\r\\n    function refreshCode() {\\r\\n        var img = document.getElementById(\"imgVerifyCode\");\\r\\n        if (img != null) {\\r\\n            var currentDate = new Date();\\r\\n            img.src = \"../VerifyCodeImage.aspx?t=\" + currentDate.getTime();\\r\\n\\r\\n        }\\r\\n    }\\r\\n\\r\\n    //滑动验证\\r\\n    function slideVerification() {\\r\\n        //滑动验证\\r\\n        var nc = new noCaptcha();\\r\\n        var nc_appkey = \\'FFFF00000000016A8646\\';  // 应用标识,不可更改\\r\\n        var nc_scene = \\'login\\';  //场景,不可更改\\r\\n        var nc_token = [nc_appkey, (new Date()).getTime(), Math.random()].join(\\':\\');\\r\\n        var nc_option = {\\r\\n            renderTo: \\'#dom_id\\',//渲染到该DOM ID指定的Div位置\\r\\n            appkey: nc_appkey,\\r\\n            scene: nc_scene,\\r\\n            token: nc_token,\\r\\n            //trans: \\'{\"name1\":\"code100\"}\\',//测试用，特殊nc_appkey时才生效，正式上线时请务必要删除；code0:通过;code100:点击验证码;code200:图形验证码;code300:恶意请求拦截处理\\r\\n            callback: function (data) {// 校验成功回调\\r\\n                console.log(data);\\r\\n                console.log(data.csessionid);\\r\\n                console.log(data.sig);\\r\\n                console.log(nc_token);\\r\\n\\r\\n                document.getElementById(\\'csessionid\\').value = data.csessionid;\\r\\n                document.getElementById(\\'sig\\').value = data.sig;\\r\\n                document.getElementById(\\'token\\').value = nc_token;\\r\\n                document.getElementById(\\'scene\\').value = nc_scene;\\r\\n            }\\r\\n        };\\r\\n        nc.init(nc_option);\\r\\n        //滑动验证结束\\r\\n    }\\r\\n\\r\\n    //滑动验证\\r\\n    function slideVerificationForReg() {\\r\\n        //滑动验证\\r\\n        var nc = new noCaptcha();\\r\\n        var nc_appkey = \\'FFFF00000000016A8646\\';  // 应用标识,不可更改\\r\\n        var nc_scene = \\'register\\';  //场景,不可更改\\r\\n        var nc_token = [nc_appkey, (new Date()).getTime(), Math.random()].join(\\':\\');\\r\\n        var nc_option = {\\r\\n            renderTo: \\'#dom_id1\\',//渲染到该DOM ID指定的Div位置\\r\\n            appkey: nc_appkey,\\r\\n            scene: nc_scene,\\r\\n            token: nc_token,\\r\\n            //trans: \\'{\"name1\":\"code100\"}\\',//测试用，特殊nc_appkey时才生效，正式上线时请务必要删除；code0:通过;code100:点击验证码;code200:图形验证码;code300:恶意请求拦截处理\\r\\n            callback: function (data) {// 校验成功回调\\r\\n                console.log(data);\\r\\n                console.log(data.csessionid);\\r\\n                console.log(data.sig);\\r\\n                console.log(nc_token);\\r\\n\\r\\n                document.getElementById(\\'csessionid1\\').value = data.csessionid;\\r\\n                document.getElementById(\\'sig1\\').value = data.sig;\\r\\n                document.getElementById(\\'token1\\').value = nc_token;\\r\\n                document.getElementById(\\'scene1\\').value = nc_scene;\\r\\n            }\\r\\n        };\\r\\n        nc.init(nc_option);\\r\\n        //滑动验证结束\\r\\n    }\\r\\n\\r\\n    $(document).ready(function () {\\r\\n\\r\\n        slideVerification();\\r\\n\\r\\n        //验证码\\r\\n        // alert($(\"#txtCodeMark\").val());\\r\\n\\r\\n        $(\"#txtCode\").keyup(function () {\\r\\n            var value = $(this).val();\\r\\n            if (value.length < 4) {\\r\\n\\r\\n                temp = \"\";\\r\\n            }\\r\\n            else if (value.length == 4) {\\r\\n                if (temp != value) {\\r\\n\\r\\n                }\\r\\n                temp = value;\\r\\n            }\\r\\n        });\\r\\n\\r\\n        $(\"#btnBindUser\").bind(\"click\", function () { BindUser(); }); //绑定商城账号\\r\\n        $(\"#btnRegisterUser\").bind(\"click\", function () { RegisterUser(); }); //绑定商城账号\\r\\n        $(\"#aRegister\").bind(\"click\", function () { $(\"#divLogin\").hide(); $(\"#divRegister\").show(); });\\r\\n        $(\"#aLogin\").bind(\"click\", function () { $(\"#divLogin\").show(); $(\"#divRegister\").hide(); });\\r\\n\\r\\n        var emails = [\\r\\n          { text: \\'@qq.com\\', selected: true },\\r\\n          { text: \\'@163.com\\' },\\r\\n          { text: \\'@126.com\\' },\\r\\n          { text: \\'@sina.com.cn\\' },\\r\\n          { text: \\'@gmail.com\\' },\\r\\n          { text: \\'@sohu.com\\' },\\r\\n          { text: \\'@hotmail.com\\' },\\r\\n          { type: \\'divider\\' },\\r\\n          { text: \\'手动输入\\', href: \"javascript:$(\\'.input-group.mail\\').after($(\\'#text_mail\\'));$(\\'.input-group.mail\\').remove();\" }\\r\\n        ];\\r\\n\\r\\n        $(\\'#emailList\\').vshopSelector({\\r\\n            data: emails, height: \\'34\\',\\r\\n            onchanged: function (item) {\\r\\n                $(\\'#text_email_domain\\').val(item.text);\\r\\n            }\\r\\n        });\\r\\n        $(\\'#emailList .btn-group\\').removeClass(\\'btn-group\\').addClass(\\'input-group-btn\\');\\r\\n\\r\\n    });\\r\\n\\r\\n    function BindUser() {\\r\\n        var username = $.trim($(\"#txtUserName\").val()),\\r\\n           password = $.trim($(\"#txtPassword\").val()),\\r\\n          session = $.trim($(\"#csessionid\").val()),\\r\\n           sig = $.trim($(\"#sig\").val()),\\r\\n            token = $.trim($(\"#token\").val()),\\r\\n             scene = $.trim($(\"#scene\").val());\\r\\n\\r\\n        if (!username || username.length < 2)\\r\\n            alert_h(\\'用户名不能为空并且至少要2个字符\\');\\r\\n\\r\\n        else if (!password || password.length < 6)\\r\\n            alert_h(\\'密码不能为空并且至少要6个字符\\');\\r\\n        else if (session == \"\") {\\r\\n            alert_h(\\'请验证通过以后操作\\');\\r\\n\\r\\n        }\\r\\n        else {\\r\\n\\r\\n\\r\\n            $.ajax({\\r\\n                url: \"/API/VshopProcess.ashx\",\\r\\n                type: \\'post\\', dataType: \\'json\\', timeout: 10000,\\r\\n                data: { action: \"BindUser\", openId: getParam(\"sessionId\"), userName: username, password: password, session: session, sig: sig, token: token, scene: scene },\\r\\n                success: function (resultData) {\\r\\n\\r\\n                    if (resultData.Status == \"OK\") {\\r\\n                        alert_h(\"登录成功！\", function () {\\r\\n                            if (getParam(\"returnUrl\") != \"\") {\\r\\n                                var str = getParam(\"returnUrl\");\\r\\n\\r\\n                                if (str.indexOf(\"ShoppingCart\") >= 0) {\\r\\n                                    location.href = getParam(\"returnUrl\");\\r\\n\\r\\n                                } else {\\r\\n                                    var url = window.location.href;\\r\\n\\r\\n                                    location.href = url.substr(url.indexOf(\"returnUrl\") + 10);\\r\\n\\r\\n                                }\\r\\n                            }\\r\\n                            else\\r\\n                                location.href = \"Default.aspx\";\\r\\n                        });\\r\\n                    }\\r\\n                    else if (resultData.Status == \"-1\") {\\r\\n                        $(\"#codeVib\").css(\"display\", \"block\");\\r\\n                        $(\"#txtCodeMark\").val(\"1\");\\r\\n                      \\r\\n                        alert_h(\"用户名不存在, 请重试\");\\r\\n                        slideVerification();\\r\\n                    }\\r\\n                    else if (resultData.Status == \"-2\") {\\r\\n                        $(\"#m-username\").val($(\"#txtUserName\").val());\\r\\n\\r\\n                        $(\"#yzsjhWin\").myOpenWin(\\r\\n                        {\\r\\n                            colseBtn: \\' <img src=\"/Utility/image/guanbi-2.png\" />\\',\\r\\n                            mouseOverCloseBtn: \\'/Utility/image/guanbi-1.png\\',\\r\\n                            openStyle: 3,\\r\\n                            colseStyle: 3,\\r\\n                            bgColor: \"red\",\\r\\n                            mozopacity: \"5.0\",\\r\\n                            opacity: \".50\",\\r\\n                            filter: \"50\",\\r\\n\\r\\n                        });\\r\\n                    }\\r\\n                    else if (resultData.Status == \"2\") {\\r\\n                        $(\"#txtCodeMark\").val(\"1\");\\r\\n                        $(\"#codeVib\").css(\"display\", \"block\");\\r\\n                      \\r\\n                        alert_h(\"您要绑定的用户已经被系统禁止登录\");\\r\\n                    }\\r\\n\\r\\n\\r\\n                    else if (resultData.Status == \"0\") {\\r\\n                        $(\"#txtCodeMark\").val(\"1\");\\r\\n                        $(\"#codeVib\").css(\"display\", \"block\");\\r\\n                       \\r\\n                        alert_h(\"用户名或密码错误, 请重试\");\\r\\n                        slideVerification();\\r\\n                        return;\\r\\n                    }\\r\\n\\r\\n\\r\\n\\r\\n                    else if (resultData.Status == \"3\") {\\r\\n                        alert_h(\"验证码不正确\");\\r\\n                        slideVerification();\\r\\n\\r\\n                    }\\r\\n                    else if (resultData.Status == \"4\") {\\r\\n                        alert_h(\"操作过于频繁,请稍后再试！\");\\r\\n                        slideVerification();\\r\\n\\r\\n                    }\\r\\n                }\\r\\n            });\\r\\n        }\\r\\n\\r\\n        }\\r\\n      \\r\\n\\r\\n     \\r\\n\\r\\n    function RegisterUser() {\\r\\n\\r\\n        var username = $.trim($(\"#txtRegisterUserName\").val()),\\r\\n           email = $.trim($(\"#text_mail\").val() + ($(\\'.input-group.mail\\').length > 0 ? $(\\'#text_email_domain\\').val() : \\'\\')),\\r\\n           password = $.trim($(\"#text_password\").val()), userphone = $(\"#register_txtCellPhone\").val(), usermsgcode = $(\"#txtRegisterMsgCode\").val();\\r\\n\\r\\n        var emailReg = /^([a-zA-Z0-9]+[_|\\\\_|\\\\.]?)*[a-zA-Z0-9]+@([a-zA-Z0-9]+[_|\\\\_|\\\\.]?)*[a-zA-Z0-9]+\\\\.[a-zA-Z]{2,3}$/;\\r\\n        var typeid = 0;\\r\\n        var regphone = /^[1-9]\\\\d{10,11}$/;\\r\\n        //alert_h(userphone.trim().length<=0);\\r\\n        //return false;\\r\\n        typeid = $(\"#typeis\").val();\\r\\n        if (typeid == 0) {\\r\\n            alert_h(\"请选择类型\");\\r\\n            return false;\\r\\n        }\\r\\n        if (!username || username.length < 2) {\\r\\n            alert_h(\\'用户名不能为空并且至少要2个字符\\');\\r\\n            return false;\\r\\n        }\\r\\n        else if (username.trim().indexOf(\" \") != -1) {\\r\\n            alert_h(\\'输入的用户名不能包括空格\\');\\r\\n            return false;\\r\\n\\r\\n\\r\\n        }\\r\\n        else if (!email) {\\r\\n            alert_h(\\'邮箱地址不能为空\\');\\r\\n            return false;\\r\\n        }\\r\\n        else if (!emailReg.test(email)) {\\r\\n            alert_h(\\'邮箱地址格式不正确\\');\\r\\n            return false;\\r\\n        }\\r\\n        else if (!password || password.length < 6) {\\r\\n            alert_h(\\'密码不能为空并且至少要6个字符\\');\\r\\n            return false;\\r\\n        }\\r\\n        else if (userphone.length <= 0) {\\r\\n            alert_h(\\'请输入您的手机号码\\');\\r\\n            return false;\\r\\n        }\\r\\n        else if (!regphone.test(userphone.trim())) {\\r\\n            alert_h(\\'您输入的手机号码不正确\\');\\r\\n            return false;\\r\\n        }\\r\\n        else if (usermsgcode.length <= 0) {\\r\\n            alert_h(\\'请输入您的短信验证码\\');\\r\\n            return false;\\r\\n        }\\r\\n        else {\\r\\n            $.ajax({\\r\\n                url: \"/API/VshopProcess.ashx\",\\r\\n                type: \\'post\\', dataType: \\'json\\', timeout: 10000,\\r\\n                data: { action: \"RegisterUser\", openId: getParam(\"sessionId\"), userName: username, email: email, password: password, TypeId: typeid, phone: userphone, msgcode: usermsgcode },\\r\\n                success: function (resultData) {\\r\\n                    if (resultData.Status == \"OK\") {\\r\\n                        alert_h(\"注册成功！\", function () {\\r\\n                            if (getParam(\"returnUrl\") != \"\")\\r\\n                                location.href = getParam(\"returnUrl\");\\r\\n                            else\\r\\n                                location.href = \"Default.aspx\";\\r\\n                        });\\r\\n                    } else if (resultData.Status == \"-4\") {\\r\\n                        alert_h(resultData.retmsg);\\r\\n                    }\\r\\n                    else if (resultData.Status == \"-1\") {\\r\\n                        alert_h(\"用户名已被注册过, 请重试\");\\r\\n                    }\\r\\n                    else if (resultData.Status == \"-2\") {\\r\\n                        alert_h(\"邮箱已经被注册过, 请重试\");\\r\\n                    }\\r\\n                    else if (resultData.Status == \"-3\") {\\r\\n                        alert_h(\"您选择的会员类型下面没有对应的类型，请重新选择\");\\r\\n                    }\\r\\n                    else {\\r\\n                        alert_h(\"注册失败, 请重试\");\\r\\n                    }\\r\\n                }\\r\\n            });\\r\\n        }\\r\\n    }\\r\\n</script>\\r\\n\\r\\n\\r\\n<p style=\"display: none\">\\r\\n    <!--<script src=\\'http://pw.cnzz.com/c.php?id=1250329172&l=2\\' language=\\'JavaScript\\' charset=\\'gb2312\\'></script>-->\\r\\n</p>\\r\\n<footer>\\r\\n    <div>\\r\\n        <div id=\"wap-home\">\\r\\n            <a href=\"default.aspx\">\\r\\n\\r\\n                <!--<img src=\"/templates/vshop/default/images/shouye-icon-zc.png\" />-->\\r\\n                <span>首页</span>\\r\\n\\r\\n            </a>\\r\\n        </div>\\r\\n\\r\\n        <div id=\"wap-search\">\\r\\n            <a href=\"ProductSearch.aspx\">\\r\\n\\r\\n                <!--<img src=\"/templates/vshop/default/images/sousuo-icon-zc.png\" />-->\\r\\n                <span>搜索</span>\\r\\n\\r\\n            </a>\\r\\n        </div>\\r\\n\\r\\n        <div id=\"wap-gouwuche\">\\r\\n            <a href=\"ShoppingCart.aspx\">\\r\\n\\r\\n                <!--<img src=\"/templates/vshop/default/images/gouwuche-icon-zc.png\" />-->\\r\\n                <span>购物车</span>\\r\\n\\r\\n            </a>\\r\\n        </div>\\r\\n\\r\\n        <div id=\"wap-huiyuanzx\">\\r\\n            <a href=\"MemberCenter.aspx\">\\r\\n\\r\\n                <!--<img src=\"/templates/vshop/default/images/gerenzhongxin-icon-zc.png\" />-->\\r\\n                <span>会员中心</span>\\r\\n\\r\\n            </a>\\r\\n        </div>\\r\\n\\r\\n        <div id=\"wap-return\">\\r\\n\\r\\n\\r\\n            <!--<img src=\"/templates/vshop/default/images/fanhui-icon-zc.png\" />-->\\r\\n            <span>返回</span>\\r\\n\\r\\n\\r\\n        </div>\\r\\n    </div>\\r\\n\\r\\n</footer>\\r\\n\\r\\n<!--<a href=\"default.aspx\">\\r\\n       <div id=\"wap-home\"></div>\\r\\n   </a>\\r\\n   <a href=\"ProductSearch.aspx\">\\r\\n       <div class=\"glyphicon glyphicon-search\"></div>\\r\\n   </a>\\r\\n   <a href=\"ShoppingCart.aspx\">\\r\\n       <div class=\"glyphicon glyphicon-shopping-cart\"></div>\\r\\n   </a>\\r\\n   <a href=\"MemberCenter.aspx\">\\r\\n       <div class=\"glyphicon glyphicon-user\"></div>\\r\\n   </a>\\r\\n   <div class=\"glyphicon glyphicon-refresh\"></div>\\r\\n   <div class=\"glyphicon glyphicon-arrow-left\"></div>-->\\r\\n<script src=\"/templates/vshop/default/script/newWapDefault.js\"></script>\\r\\n<style>\\r\\n    /*底部*/\\r\\n    footer {\\r\\n        background-color: white;\\r\\n        padding: 3px 0px;\\r\\n        border-top:1px solid #999;\\r\\n    }\\r\\n\\r\\n    footer span {\\r\\n        display: block;\\r\\n        color: #A2A2A2;\\r\\n        padding-top: 25px;\\r\\n        font-size:12px;\\r\\n    }\\r\\n\\r\\n    footer div {\\r\\n        width: 95%;\\r\\n        margin: 0 auto;\\r\\n    }\\r\\n\\r\\n    #wap-home, #wap-search, #wap-gouwuche, #wap-huiyuanzx, #wap-return {\\r\\n        width: 20%;\\r\\n        text-align: center;\\r\\n        float: left;\\r\\n        min-height: 34px;\\r\\n         background-size:25px; \\r\\n         \\r\\n    }\\r\\n\\r\\n    #wap-home {\\r\\n        background-image: url(/templates/vshop/default/images/shouye-icon-zc.png);\\r\\n        background-repeat: no-repeat;\\r\\n        background-position-x: 50%;\\r\\n       \\r\\n    }\\r\\n\\r\\n    #wap-search {\\r\\n        background-image: url(/templates/vshop/default/images/sousuo-icon-zc.png);\\r\\n        background-repeat: no-repeat;\\r\\n        background-position-x: 50%;\\r\\n    }\\r\\n\\r\\n      #wap-gouwuche {\\r\\n        background-image: url(/templates/vshop/default/images/gouwuche-icon-zc.png);\\r\\n        background-repeat: no-repeat;\\r\\n        background-position-x: 50%;\\r\\n       \\r\\n    }\\r\\n\\r\\n        #wap-huiyuanzx {\\r\\n        background-image: url(/templates/vshop/default/images/gerenzhongxin-icon-zc.png);\\r\\n        background-repeat: no-repeat;\\r\\n        background-position-x: 50%;\\r\\n    }\\r\\n\\r\\n          #wap-return {\\r\\n        background-image: url(/templates/vshop/default/images/fanhui-icon-zc.png);\\r\\n        background-repeat: no-repeat;\\r\\n        background-position-x: 50%;\\r\\n    }\\r\\n\\r\\n    #wap-home span:hover, #wap-search span:hover, #wap-gouwuche span:hover, #wap-huiyuanzx span:hover, #wap-return span:hover {\\r\\n        color: #E70F20;\\r\\n    }\\r\\n\\r\\n    #wap-home:hover {\\r\\n        background-image: url(/templates/vshop/default/images/shouye-icon-anxai.png);\\r\\n        background-repeat: no-repeat;\\r\\n        text-align: center;\\r\\n    }\\r\\n\\r\\n    #wap-search:hover {\\r\\n        background-image: url(/templates/vshop/default/images/sousuo-icon-anxai.png);\\r\\n        background-repeat: no-repeat;\\r\\n        text-align: center;\\r\\n    }\\r\\n\\r\\n    #wap-gouwuche:hover {\\r\\n        background-image: url(/templates/vshop/default/images/gouwuche-icon-anxai.png);\\r\\n        background-repeat: no-repeat;\\r\\n        text-align: center;\\r\\n    }\\r\\n\\r\\n    #wap-huiyuanzx:hover {\\r\\n        background-image: url(/templates/vshop/default/images/gerenzhongxin-icon-anxia.png);\\r\\n        background-repeat: no-repeat;\\r\\n        text-align: center;\\r\\n    }\\r\\n\\r\\n    #wap-return:hover {\\r\\n        background-image: url(/templates/vshop/default/images/fanhui-icon-anxia.png);\\r\\n        background-repeat: no-repeat;\\r\\n        text-align: center;\\r\\n    }\\r\\n\\r\\n    #wap-return {\\r\\n        cursor: pointer;\\r\\n    }\\r\\n\\r\\n    footer a img {\\r\\n        /*//width: 100%;*/\\r\\n        width: 20px;\\r\\n    }\\r\\n</style>\\r\\n\\r\\n<script type=\"text/javascript\">\\r\\n\\r\\n    //隐藏底部菜单\\r\\n    document.addEventListener(\\'WeixinJSBridgeReady\\', function onBridgeReady() {\\r\\n        WeixinJSBridge.call(\\'hideToolbar\\');\\r\\n    });\\r\\n</script>\\r\\n</body> </html> \\r\\n<link href=\"../../../Utility/vshopSelector/vshopYZPhone.css\" rel=\"stylesheet\" />\\r\\n<script src=\"../../../Utility/common.js\"></script>\\r\\n<script src=\"../../../Utility/jquery.openwin.js\"></script>\\r\\n<script src=\"../../../Utility/vshopSelector/vshopBindCellPhon.js\"></script>\\r\\n<div id=\"yzsjhWin\">\\r\\n    <form id=\"bindPhone\">\\r\\n        <h2>为了您的账号安全,请绑定手机号</h2>\\r\\n        <input readonly placeholder=\"用户名\" type=\"text\" class=\"form-control\" name=\"m-username\" value=\"\" id=\"m-username\" />\\r\\n        <input placeholder=\"手机号码\" type=\"text\" class=\"form-control\" name=\"m-userphone\" id=\"m-userphone\" value=\"\" />\\r\\n        <div>\\r\\n            <input type=\"text\" style=\"width:40%;float:left;\" id=\"m-msmcode\" class=\"form-control\" placeholder=\"请输入验证码\" /><span style=\"width:50%;float:left; margin-left:5%;\" id=\"login_sendCode\" class=\"codeloginzq\">获取短信</span>\\r\\n        </div>\\r\\n        <button class=\"form-control\" id=\"savaphone-btn\" type=\"button\">提交信息</button>\\r\\n    </form>\\r\\n</div>\\r\\n\\r\\n<!--汉字验证码的框-->\\r\\n<div id=\"wordCode\">\\r\\n    <h2>请滑动验证</h2>\\r\\n    <div style=\"width:90%; margin:0 auto\" class=\"ln1\">\\r\\n        <div id=\"dom_id1\"></div>\\r\\n    </div>\\r\\n\\r\\n    <input type=\\'hidden\\' id=\\'csessionid1\\' name=\\'csessionid\\' />\\r\\n    <input type=\\'hidden\\' id=\\'sig1\\' name=\\'sig\\' />\\r\\n    <input type=\\'hidden\\' id=\\'token1\\' name=\\'token\\' />\\r\\n    <input type=\\'hidden\\' id=\\'scene1\\' name=\\'scene\\' />\\r\\n    <div id=\"wordCodeBtndiv\">\\r\\n        <span type=\"button\" class=\"codezq\" id=\"wordCodeBtn\">确定</span>\\r\\n    </div>\\r\\n</div>\\r\\n\\r\\n  '"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "r.text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_page(url):\n",
    "    try:\n",
    "        r = requests.get(url, headers=headers)\n",
    "        r.raise_for_status\n",
    "        return r.json()\n",
    "    except:\n",
    "        print(url)\n",
    "        return"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 78%|███████████████████████████████▉         | 78/100 [00:21<00:07,  2.95it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://club.jd.com/comment/productPageComments.action?productId=251837&score=0&sortType=6&page=78&pageSize=10\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████| 100/100 [00:29<00:00,  3.13it/s]\n"
     ]
    }
   ],
   "source": [
    "comments = []\n",
    "for url in tqdm(jd_comment_urls):\n",
    "    data = get_page(url)\n",
    "    try:\n",
    "        comments += data['comments']\n",
    "    except:\n",
    "        continue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "nickname = [comment['nickname'] for comment in comments]\n",
    "content = [comment['content'] for comment in comments]\n",
    "productColor = [comment['productColor'] for comment in comments]\n",
    "referenceName = [comment['referenceName'] for comment in comments]\n",
    "creationTime = [comment['creationTime'] for comment in comments]\n",
    "days = [comment['days'] for comment in comments]\n",
    "referenceTime = [comment['referenceTime'] for comment in comments]\n",
    "score = [comment['score'] for comment in comments]\n",
    "replyCount = [comment['replyCount'] for comment in comments]\n",
    "usefulVoteCount = [comment['usefulVoteCount'] for comment in comments]\n",
    "uselessVoteCount = [comment['uselessVoteCount'] for comment in comments]\n",
    "isMobile = [comment['isMobile'] for comment in comments]\n",
    "userClient = [comment['userClient'] for comment in comments]\n",
    "userClientShow = [comment['userClientShow'] for comment in comments]\n",
    "userLevelName = [comment['userLevelName'] for comment in comments]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████| 984/984 [00:00<00:00, 20485.81it/s]\n"
     ]
    }
   ],
   "source": [
    "date = time.strftime('%Y%m%d',time.localtime(time.time()))\n",
    "with open(r'data/jd_comments%s.csv' % (date), 'w', newline='') as f:\n",
    "    writer = csv.writer(f)\n",
    "    for i in tqdm(range(len(nickname))):\n",
    "        writer.writerow([nickname[i], content[i], creationTime[i], referenceName[i], referenceTime[i], userClientShow[i], userLevelName[i], days[i], isMobile[i], productColor[i],  replyCount[i], score[i], usefulVoteCount[i], uselessVoteCount[i], userClient[i]])"
   ]
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  },
  "toc": {
   "colors": {
    "hover_highlight": "#DAA520",
    "navigate_num": "#000000",
    "navigate_text": "#333333",
    "running_highlight": "#FF0000",
    "selected_highlight": "#FFD700",
    "sidebar_border": "#EEEEEE",
    "wrapper_background": "#FFFFFF"
   },
   "moveMenuLeft": true,
   "nav_menu": {
    "height": "12px",
    "width": "252px"
   },
   "navigate_menu": true,
   "number_sections": true,
   "sideBar": true,
   "threshold": 4,
   "toc_cell": false,
   "toc_section_display": "block",
   "toc_window_display": false,
   "widenNotebook": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
